import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import pickle
import helper
from IPython.display import display, HTML
import seaborn as sns
df_orig = helper.get_data()
df_orig.shape
(18110, 20)
# 21 (18 clusters), 28 (22 clusters), 41 (25 clusters) - dataset1 (without 01)
df = helper.get_cluster_df(df_orig, idx = 41, expected_cluster = 25)
/home/nandy/repository/bit/agp/helper.py:110: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
df_fossil = df_fossil.groupby(['clust'])['fossil','green'].sum().sort_values('fossil', ascending = False)
fuel_cols = ['wind', 'solar', 'solar_rooftop','blackcoal', 'browncoal','gas','diesel','kerosene','hydro','bagasse','batterys', 'net_import']
additional_cols = ['demand', 'rrp', 'export']
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 100)
pd.set_option('display.expand_frame_repr', True)
X = df[['clust'] + fuel_cols]
X = X.groupby(['clust']).agg(['mean', 'std']).reset_index().apply(lambda x: round(x, 2))
X = X.rename(columns = {'clust' : 'cluster'})
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(HTML(X.to_html(index = False)))
| cluster | wind | solar | solar_rooftop | blackcoal | browncoal | gas | diesel | kerosene | hydro | bagasse | batterys | net_import | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | mean | std | |
| 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 78.83 | 4.50 | 0.00 | 0.00 | 19.15 | 4.42 | 0.00 | 0.00 | 0.00 | 0.01 | 1.37 | 0.75 | 0.63 | 0.54 | 0.00 | 0.00 | 0.02 | 0.12 |
| 2 | 0.00 | 0.00 | 0.01 | 0.04 | 1.17 | 1.97 | 78.62 | 4.62 | 0.00 | 0.00 | 17.49 | 4.95 | 0.00 | 0.00 | 0.42 | 0.47 | 1.41 | 0.88 | 0.43 | 0.41 | 0.00 | 0.00 | 0.44 | 0.88 |
| 3 | 4.49 | 3.47 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 88.14 | 5.35 | 2.38 | 2.41 | 0.00 | 0.00 | 0.00 | 0.00 | 4.96 | 3.51 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.12 |
| 4 | 0.73 | 1.00 | 2.38 | 2.18 | 5.26 | 1.68 | 79.80 | 4.98 | 0.00 | 0.00 | 10.01 | 3.51 | 0.00 | 0.00 | 0.00 | 0.00 | 1.40 | 0.71 | 0.40 | 0.35 | 0.00 | 0.00 | 0.01 | 0.07 |
| 5 | 0.79 | 0.59 | 3.19 | 1.50 | 5.04 | 1.60 | 78.13 | 4.33 | 0.00 | 0.00 | 10.14 | 2.74 | 0.00 | 0.01 | 0.18 | 0.18 | 1.83 | 0.76 | 0.30 | 0.27 | 0.00 | 0.00 | 0.40 | 0.99 |
| 6 | 2.09 | 1.82 | 0.10 | 0.26 | 0.24 | 0.69 | 80.49 | 4.31 | 0.00 | 0.00 | 5.65 | 2.16 | 0.35 | 0.07 | 0.00 | 0.00 | 3.92 | 2.07 | 0.00 | 0.00 | 0.00 | 0.00 | 7.15 | 4.02 |
| 7 | 1.54 | 1.49 | 0.14 | 0.27 | 0.00 | 0.00 | 78.84 | 6.25 | 0.00 | 0.00 | 5.24 | 1.95 | 0.00 | 0.00 | 0.00 | 0.00 | 3.50 | 2.37 | -0.00 | 0.00 | 0.00 | 0.00 | 10.73 | 5.41 |
| 8 | 8.17 | 5.54 | 0.03 | 0.06 | 2.76 | 1.19 | 0.00 | 0.00 | 79.75 | 6.16 | 4.25 | 4.10 | 0.00 | 0.00 | 0.00 | 0.00 | 4.99 | 3.61 | 0.00 | 0.00 | 0.00 | 0.00 | 0.06 | 0.26 |
| 9 | 5.35 | 4.21 | 0.04 | 0.09 | 1.83 | 1.89 | 0.00 | 0.00 | 74.95 | 8.41 | 6.25 | 4.54 | 0.00 | 0.00 | 0.00 | 0.00 | 6.61 | 3.55 | 0.00 | 0.00 | 0.00 | 0.00 | 4.97 | 2.79 |
| 10 | 4.62 | 3.00 | 1.61 | 0.93 | 3.42 | 1.46 | 77.82 | 6.27 | 0.00 | 0.00 | 2.42 | 2.15 | 0.00 | 0.00 | 0.00 | 0.00 | 3.59 | 1.97 | -0.00 | 0.00 | 0.00 | 0.00 | 6.52 | 4.44 |
| 11 | 3.63 | 2.53 | 1.06 | 0.78 | 2.38 | 1.83 | 72.08 | 7.32 | 0.00 | 0.00 | 5.10 | 2.52 | 0.01 | 0.02 | 0.03 | 0.03 | 5.32 | 3.00 | 0.00 | 0.00 | 0.00 | 0.00 | 10.38 | 5.08 |
| 12 | 12.42 | 6.78 | 1.34 | 0.57 | 5.20 | 1.97 | 0.00 | 0.00 | 73.94 | 5.84 | 3.22 | 3.11 | 0.00 | 0.00 | 0.00 | 0.00 | 3.71 | 2.72 | 0.00 | 0.00 | 0.04 | 0.02 | 0.13 | 0.39 |
| 13 | 8.97 | 6.13 | 1.21 | 0.53 | 4.03 | 1.93 | 0.00 | 0.00 | 65.70 | 6.69 | 7.42 | 4.37 | 0.00 | 0.00 | 0.00 | 0.00 | 6.04 | 3.52 | 0.00 | 0.00 | 0.05 | 0.02 | 6.57 | 3.51 |
| 14 | 15.32 | NaN | 0.00 | NaN | 0.00 | NaN | 0.00 | NaN | 26.49 | NaN | 39.15 | NaN | 1.43 | NaN | 0.00 | NaN | 0.00 | NaN | 0.00 | NaN | 0.00 | NaN | 17.61 | NaN |
| 15 | 23.52 | 14.44 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 18.60 | 10.41 | 43.26 | 11.31 | 0.17 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 14.45 | 11.01 |
| 16 | 29.40 | 16.76 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 19.39 | 10.28 | 39.83 | 11.62 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 11.38 | 10.34 |
| 17 | 23.41 | 12.20 | 1.04 | 1.46 | 7.96 | 5.11 | 0.00 | 0.00 | 0.27 | 1.91 | 56.40 | 11.38 | 0.96 | 0.86 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 9.96 | 9.96 |
| 18 | 25.91 | 12.02 | 3.40 | 1.92 | 11.61 | 5.36 | 0.00 | 0.00 | 0.00 | 0.00 | 53.14 | 10.79 | 0.41 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.11 | 0.06 | 5.41 | 6.89 |
| 19 | 26.18 | 13.85 | 3.17 | 1.64 | 11.34 | 4.99 | 0.00 | 0.00 | 0.00 | 0.00 | 47.34 | 14.49 | 0.01 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.05 | 11.93 | 7.72 |
| 20 | 34.54 | 18.35 | 0.00 | 0.02 | 6.63 | 4.09 | 0.00 | 0.00 | 0.00 | 0.00 | 46.28 | 15.15 | 0.04 | 0.06 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 12.51 | 12.86 |
| 21 | 45.75 | 18.08 | 2.60 | 1.42 | 9.62 | 4.43 | 0.00 | 0.00 | 0.00 | 0.00 | 41.92 | 16.75 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.05 | 0.06 | 0.26 |
| 22 | 7.43 | 5.49 | 0.00 | 0.00 | 0.76 | 0.88 | 0.00 | 0.00 | 0.00 | 0.00 | 17.05 | 3.96 | 0.00 | 0.00 | 0.00 | 0.00 | 60.40 | 14.24 | 0.00 | 0.00 | 0.00 | 0.00 | 14.35 | 12.31 |
| 23 | 5.10 | 4.08 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 14.06 | 3.67 | 0.00 | 0.00 | 0.00 | 0.00 | 80.78 | 6.10 | 0.00 | 0.00 | 0.00 | 0.00 | 0.06 | 0.19 |
| 24 | 10.09 | 6.42 | 0.00 | 0.00 | 0.89 | 1.10 | 0.00 | 0.00 | 0.00 | 0.00 | 0.72 | 1.33 | 0.00 | 0.00 | 0.00 | 0.00 | 66.17 | 13.98 | 0.00 | 0.00 | 0.00 | 0.00 | 22.13 | 12.69 |
| 25 | 9.04 | 5.53 | 0.00 | 0.00 | 0.63 | 0.76 | 0.00 | 0.00 | 0.00 | 0.00 | 0.63 | 1.07 | 0.00 | 0.00 | 0.00 | 0.00 | 89.67 | 5.64 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.14 |
X = df[['clust'] + additional_cols]
X = X.groupby(['clust']).agg(['mean', 'std']).reset_index().apply(lambda x: round(x, 2))
X = X.rename(columns = {'clust' : 'cluster'})
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(HTML(X.to_html(index = False)))
| cluster | demand | rrp | export | |||
|---|---|---|---|---|---|---|
| mean | std | mean | std | mean | std | |
| 1 | 141655.59 | 9309.28 | 42.53 | 19.94 | 12087.25 | 7244.41 |
| 2 | 160156.66 | 15318.19 | 126.29 | 193.69 | 6700.23 | 7075.08 |
| 3 | 133180.33 | 13331.62 | 39.22 | 22.64 | 15605.51 | 9717.94 |
| 4 | 159693.26 | 12114.22 | 67.13 | 22.97 | 14572.51 | 6711.84 |
| 5 | 164824.64 | 14676.17 | 76.90 | 31.69 | 11811.98 | 8331.80 |
| 6 | 191580.08 | 14076.02 | 57.18 | 12.64 | 29.39 | 251.10 |
| 7 | 196706.05 | 17922.68 | 43.43 | 21.57 | 19.89 | 293.02 |
| 8 | 125145.06 | 12425.02 | 79.64 | 50.38 | 13593.15 | 11994.16 |
| 9 | 138416.17 | 14691.02 | 88.30 | 58.44 | 27.44 | 428.13 |
| 10 | 199191.90 | 17425.48 | 75.52 | 26.39 | 404.01 | 1658.92 |
| 11 | 211972.99 | 24378.54 | 177.08 | 277.43 | 74.12 | 741.23 |
| 12 | 123318.78 | 11571.64 | 69.39 | 63.42 | 7571.75 | 6732.44 |
| 13 | 132259.72 | 12770.56 | 115.85 | 235.86 | 56.16 | 627.12 |
| 14 | 33977.00 | NaN | 52.56 | NaN | 0.00 | NaN |
| 15 | 39485.87 | 5907.59 | 84.83 | 73.71 | 373.70 | 1183.43 |
| 16 | 36690.27 | 4598.20 | 42.24 | 20.08 | 533.97 | 1360.94 |
| 17 | 42278.96 | 8075.17 | 312.11 | 462.70 | 867.01 | 2072.90 |
| 18 | 44377.62 | 10978.07 | 198.23 | 246.10 | 554.26 | 1344.91 |
| 19 | 37703.66 | 4934.91 | 78.30 | 31.22 | 0.00 | 0.00 |
| 20 | 36698.74 | 4364.18 | 89.77 | 31.91 | 1779.57 | 3172.63 |
| 21 | 37465.38 | 3841.35 | 64.76 | 39.39 | 4455.69 | 3112.78 |
| 22 | 28230.67 | 2203.46 | 79.72 | 57.92 | 553.48 | 1915.27 |
| 23 | 29183.58 | 2657.03 | 49.02 | 40.28 | 5427.92 | 3970.16 |
| 24 | 28559.48 | 2048.81 | 62.29 | 34.11 | 0.00 | 0.00 |
| 25 | 31036.14 | 2270.82 | 55.85 | 33.62 | 5248.21 | 3763.37 |
helper.plot_cluster_percentages(df, reorder = True)
| clust | count | percentage |
|---|---|---|
| 7 | 2120 | 11.71 |
| 3 | 2056 | 11.35 |
| 1 | 1928 | 10.65 |
| 16 | 1785 | 9.86 |
| 10 | 1332 | 7.36 |
| 4 | 1254 | 6.92 |
| 25 | 1224 | 6.76 |
| 23 | 859 | 4.74 |
| 24 | 782 | 4.32 |
| 22 | 757 | 4.18 |
| 20 | 620 | 3.42 |
| 21 | 538 | 2.97 |
| 8 | 475 | 2.62 |
| 12 | 439 | 2.42 |
| 2 | 380 | 2.10 |
| 9 | 366 | 2.02 |
| 15 | 323 | 1.78 |
| 13 | 286 | 1.58 |
| 19 | 238 | 1.31 |
| 17 | 101 | 0.56 |
| 11 | 100 | 0.55 |
| 6 | 73 | 0.40 |
| 5 | 57 | 0.31 |
| 18 | 16 | 0.09 |
| 14 | 1 | 0.01 |
t = helper.get_cluster_state_shares(df)
print(t)
state NSW1 QLD1 SA1 TAS1 VIC1 clust 1 0.0 1928.0 0.0 0.0 0.0 2 0.0 380.0 0.0 0.0 0.0 3 0.0 0.0 0.0 0.0 2056.0 4 1.0 1253.0 0.0 0.0 0.0 5 0.0 57.0 0.0 0.0 0.0 6 73.0 0.0 0.0 0.0 0.0 7 2118.0 2.0 0.0 0.0 0.0 8 0.0 0.0 0.0 0.0 475.0 9 0.0 0.0 0.0 0.0 366.0 10 1330.0 2.0 0.0 0.0 0.0 11 100.0 0.0 0.0 0.0 0.0 12 0.0 0.0 0.0 0.0 439.0 13 0.0 0.0 0.0 0.0 286.0 14 0.0 0.0 1.0 0.0 0.0 15 0.0 0.0 323.0 0.0 0.0 16 0.0 0.0 1785.0 0.0 0.0 17 0.0 0.0 101.0 0.0 0.0 18 0.0 0.0 16.0 0.0 0.0 19 0.0 0.0 238.0 0.0 0.0 20 0.0 0.0 620.0 0.0 0.0 21 0.0 0.0 538.0 0.0 0.0 22 0.0 0.0 0.0 757.0 0.0 23 0.0 0.0 0.0 859.0 0.0 24 0.0 0.0 0.0 782.0 0.0 25 0.0 0.0 0.0 1224.0 0.0
X = df[['state', 'year', 'month', 'day', 'clust', 'probs']]
X = helper.get_cluster_assignment_day(X)
months=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']
X.loc[:,'month'] = pd.Categorical(X['month'].map(lambda x: months[x-1]), categories=months, ordered=True)
X = X.pivot_table(index = ['state', 'month'], columns =['year', "day"], values = "clust")
# years = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
years = list(range(2010, 2021))
fig, ax = plt.subplots(11, 1, figsize=(10, 48))
for idx in range(0, 11):
d = X.loc['NSW1'][years[idx]]
g = sns.heatmap(d,annot=True, vmin = 1, vmax = 25,
yticklabels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec'],
linewidths=.5, cmap="YlGnBu", ax = ax[idx], cbar = False,
mask = d.isnull())
g.set_yticklabels(g.get_yticklabels(), rotation = 0, fontsize = 8)
ax[idx].set_xlabel('Time (in days)')
ax[idx].set_ylabel('{}'.format(years[idx]))
ax[idx].xaxis.label.set_size(12)
ax[idx].yaxis.label.set_size(12)
years = list(range(2010, 2021))
fig, ax = plt.subplots(11, 1, figsize=(10, 48))
for idx in range(0, 11):
d = X.loc['NSW1'][years[idx]].dropna()
g = sns.heatmap(d,annot=True, vmin = 1, vmax = 25,
# yticklabels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec'],
linewidths=.5, cmap="YlGnBu", ax = ax[idx], cbar = False)
# mask = X.loc['NSW1'][years[idx]].isnull())
g.set_yticklabels(g.get_yticklabels(), rotation = 0, fontsize = 8)
ax[idx].set_xlabel('Time (in days)')
ax[idx].set_ylabel('{}'.format(years[idx]))
ax[idx].xaxis.label.set_size(12)
ax[idx].yaxis.label.set_size(12)
X.head()
| year | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| state | month | |||||||||||
| NSW1 | 1 | NaN | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 10.0 |
| 2 | NaN | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 10.0 | |
| 3 | NaN | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 10.0 | |
| 4 | NaN | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 10.0 | |
| 5 | NaN | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 10.0 |
X = df[['datetime', 'state', 'year', 'month', 'clust', 'probs']]
X = helper.get_cluster_assignment_month(X)
print('unique clusters {}'.format(np.sort(X['clust'].unique())))
X = X.pivot_table(index = ['state', 'month'], columns = "year", values = "clust")
fig, ax = plt.subplots(figsize=(10, 20))
sns.heatmap(X,annot=True, vmin = 1, vmax = 25,
linewidths=.5, cmap="YlGnBu", ax = ax, cbar = False)
plt.ylabel('State - Month', fontsize = 12)
plt.xlabel('Time (in years)', fontsize = 12)
/home/nandy/repository/git/p3_venv/lib/python3.6/site-packages/pandas/core/indexing.py:1596: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self.obj[key] = _infer_fill_value(value) /home/nandy/repository/git/p3_venv/lib/python3.6/site-packages/pandas/core/indexing.py:1743: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy isetter(ilocs[0], value)
unique clusters [ 1 2 3 4 6 7 8 9 10 11 12 13 16 17 19 20 21 22 23 24 25]
Text(0.5, 159.0, 'Time (in years)')
fig, ax = plt.subplots(figsize=(10, 5))
X = df.copy()
# mkr_dict = {'NSW1': 'x', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'1'}
mkr_dict ={'NSW1': 'D', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'*'}
g = sns.scatterplot(x = 'datetime', y = 'demand', hue='clust_category', style='state', s = 30,
markers = mkr_dict, data = X, alpha = 0.5, legend='brief')
handles, labels = ax.get_legend_handles_labels()
labels = labels[1:26] + labels[27:]
handles = handles[1:26] + handles[27:]
g.legend(handles=handles, labels=labels, fontsize = 12,
loc=4, ncol= 8, bbox_to_anchor=(1.05, -0.6), title = 'Clusters/State',title_fontsize = 12)
# legend = g.legend()
# legend.texts[0].set_text("Clusters")
# legend.texts[1].set_text("State")
# Set x-axis label
plt.xlabel('Time (in days)', fontsize = 14)
a = plt.xticks(fontsize= 14)
# Set y-axis label
plt.ylabel('Demand (MWh)', fontsize = 14)
a = plt.xticks(fontsize= 14)
t = helper.get_additional_col_month(df, 'demand')
fig, ax = plt.subplots(figsize=(10, 5))
X = t.copy()
mkr_dict ={'NSW1': 'D', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'*'}
g = sns.scatterplot(x = 'datetime', y = 'demand', hue='clust_category', style='state', s = 50,
markers = mkr_dict, data = X, alpha = 0.5, legend='brief')
handles, labels = ax.get_legend_handles_labels()
# for idx, txt in enumerate(labels):
# print('{} - {}'.format(idx, txt))
labels = labels[1:22] + labels[23:]
handles = handles[1:22] + handles[23:]
g.legend(handles=handles, labels=labels, fontsize = 12,
loc=4, ncol= 8, bbox_to_anchor=(1.05, -0.6), title = 'Clusters/State', title_fontsize = 12)
# Set x-axis label
plt.xlabel('Time (in days)', fontsize = 14)
a = plt.xticks(fontsize= 14)
# Set y-axis label
plt.ylabel('Demand (MWh)', fontsize = 14)
a = plt.xticks(fontsize= 14)
fig, ax = plt.subplots(figsize=(10, 5))
X = df[df.rrp>150]
mkr_dict ={'NSW1': 'D', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'*'}
g = sns.scatterplot(x = 'datetime', y = 'rrp', hue='clust_category', style='state', s = 50,
markers = mkr_dict, data = X, alpha = 0.8, legend='brief')
handles, labels = ax.get_legend_handles_labels()
labels = labels[1:26] + labels[27:]
handles = handles[1:26] + handles[27:]
g.legend(handles=handles, labels=labels, fontsize = 12,
loc=4, ncol= 8, bbox_to_anchor=(1.05, -0.6), title = 'Clusters/State',title_fontsize = 12)
# Set x-axis label
plt.xlabel('Time (in days)', fontsize = 14)
a = plt.xticks(fontsize= 14)
# Set y-axis label
plt.ylabel('RRP', fontsize = 14)
a = plt.xticks(fontsize= 14)
t = helper.get_additional_col_month(df, 'rrp')
fig, ax = plt.subplots(figsize=(10, 5))
X = t.copy()
mkr_dict ={'NSW1': 'D', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'*'}
g = sns.scatterplot(x = 'datetime', y = 'rrp', hue='clust_category', style='state', s = 30,
markers = mkr_dict, data = X, alpha = 0.5, legend='brief')
handles, labels = ax.get_legend_handles_labels()
# for idx, txt in enumerate(labels):
# print('{} - {}'.format(idx, txt))
labels = labels[1:22] + labels[23:]
handles = handles[1:22] + handles[23:]
g.legend(handles=handles, labels=labels, fontsize = 12,
loc=4, ncol= 8, bbox_to_anchor=(1.05, -0.6), title = 'Clusters/State', title_fontsize = 12)
# Set x-axis label
plt.xlabel('Time (in days)', fontsize = 14)
a = plt.xticks(fontsize= 14)
# Set y-axis label
plt.ylabel('RRP', fontsize = 14)
a = plt.xticks(fontsize= 14)
fig, ax = plt.subplots(5, 1, figsize=(10, 25))
X = pd.merge(df, df.groupby(['clust']).size().reset_index(name='count'), on='clust')
mkr_dict ={'NSW1': 'D', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'*'}
g = sns.scatterplot(x = 'datetime', y = 'export', hue='clust_category',
style='state', size='count', ax = ax[0],
markers = mkr_dict, data = X[X.state=='NSW1'], alpha = 0.8, legend=False)
ax[0].set_ylabel('Export - NSW1', fontsize = 14)
ax[0].set_xlabel('')
g = sns.scatterplot(x = 'datetime', y = 'export', hue='clust_category',
style='state', size='count', ax = ax[1],
markers = mkr_dict, data = X[X.state=='QLD1'], alpha = 0.8, legend=False)
ax[1].set_ylabel('Export - QLD1', fontsize = 14)
ax[1].set_xlabel('')
g = sns.scatterplot(x = 'datetime', y = 'export', hue='clust_category',
style='state', size='count', ax = ax[2],
markers = mkr_dict, data = X[X.state=='VIC1'], alpha = 0.8, legend=False)
ax[2].set_ylabel('Export - VIC1', fontsize = 14)
ax[2].set_xlabel('')
g = sns.scatterplot(x = 'datetime', y = 'export', hue='clust_category',
style='state', size='count', ax = ax[3],
markers = mkr_dict, data = X[X.state=='SA1'], alpha = 0.8, legend = False)
ax[3].set_ylabel('Export - SA1', fontsize = 14)
ax[3].set_xlabel('')
g = sns.scatterplot(x = 'datetime', y = 'export', hue='clust_category',
style='state', size='count', ax = ax[4],
markers = mkr_dict, data = X[X.state=='TAS1'], alpha = 0.8, legend='brief')
ax[4].set_ylabel('Export - TAS1', fontsize = 14)
handles, labels = ax[4].get_legend_handles_labels()
labels = labels[1:26]
handles = handles[1:26]
g.legend(handles=handles, labels=labels, fontsize = 12,
loc=4, ncol= 8, bbox_to_anchor=(1.0, -0.7), title = 'Clusters/State',title_fontsize = 12)
# Set x-axis label
plt.xlabel('Time (in days)', fontsize = 14)
a = plt.xticks(fontsize= 14)
t = helper.get_additional_col_month(df, 'export')
fig, ax = plt.subplots(figsize=(10, 5))
# X = t.copy()
X = pd.merge(t, t.groupby(['clust']).size().reset_index(name='count'), on='clust')
mkr_dict ={'NSW1': 'D', 'QLD1': 'v', 'SA1': 'o', 'VIC1':'^', 'TAS1' :'*'}
g = sns.scatterplot(x = 'datetime', y = 'export', hue='clust_category', style='state', size = 'count',
markers = mkr_dict, data = X, alpha = 0.5, legend='brief')
handles, labels = ax.get_legend_handles_labels()
# for idx, txt in enumerate(labels):
# print('{} - {}'.format(idx, txt))
labels = labels[1:22] + labels[29:]
handles = handles[1:22] + handles[29:]
g.legend(handles=handles, labels=labels, fontsize = 12,
loc=4, ncol= 8, bbox_to_anchor=(1.05, -0.6), title = 'Clusters/State', title_fontsize = 12)
# Set x-axis label
plt.xlabel('Time (in days)', fontsize = 14)
a = plt.xticks(fontsize= 14)
# Set y-axis label
plt.ylabel('Export', fontsize = 14)
a = plt.xticks(fontsize= 14)
Z = pd.DataFrame(pickle.load(open('models/ftz_daily_5.pkl', 'rb')))
Z = Z.rename(columns={0: "dim1", 1: "dim2"})
Z['state'] = df['state'].astype('str').astype('category')
Z['clust_category'] = df['clust_category']
g = sns.scatterplot(x="dim1", y="dim2", hue="clust_category", data=Z, edgecolors='none', alpha = 0.1)
# sns.despine(bottom=True)
g.legend(loc=4, ncol= 5, bbox_to_anchor=(1.5, -0.5))
# Set x-axis label
plt.xlabel('Dimension 1')
# Set y-axis label
plt.ylabel('Dimension 2')
Text(0, 0.5, 'Dimension 2')
import random
import warnings
warnings.filterwarnings('ignore')
SMALL_SIZE = 100
MEDIUM_SIZE = 120
energy_cols = ['wind', 'solar', 'solar_rooftop', 'blackcoal', 'browncoal','gas','diesel','kerosene','hydro','bagasse','batterys', 'net_import']
additional_cols = ['export', 'demand']
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE, markerscale = 10) # legend fontsize
# plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
# sample 15K points
Xplot = pd.DataFrame()
for s in df.state.unique():
X_sub = df[df.state == s]
max_sample = 5000 if X_sub.shape[0] > 5000 else X_sub.shape[0]
X_sub = X_sub.iloc[random.sample(range(0, X_sub.shape[0]), max_sample),:]
Xplot = pd.concat([Xplot, X_sub])
pp = sns.pairplot(Xplot[energy_cols + additional_cols + ['clust_category']], hue = 'clust_category', diag_kind = 'kde',
plot_kws = {'alpha': 0.5, 'edgecolor': 'k'}, height = 10)
plt.show()